In [1]:
import os
from bs4 import BeautifulSoup
import pandas as pd
In [2]:
os.listdir(os.curdir)
Out[2]:
In [3]:
file = open('01_html_Einblick.htm', 'r')
file = file.read()
In [4]:
file
Out[4]:
In [5]:
BeautifulSoup(file, 'html.parser')
Out[5]:
In [6]:
file_soup = BeautifulSoup(file, 'html.parser')
In [8]:
lst = file_soup.find_all('td')
In [15]:
pd.DataFrame([{'Vorname':'Markus', 'Nachname': 'Peters', 'Alter': 89}, {'Vorname':'Susanne', 'Nachname': 'Peters', 'Alter': 94}])
Out[15]:
In [ ]:
[{'Vorname':'Markus', 'Nachname': 'Peters', 'Alter': 89},
{'Vorname':'Susanne', 'Nachname': 'Peters', 'Alter': 94}]
In [18]:
lst[0].text
Out[18]:
In [60]:
new_lst = []
for x in lst:
new_lst.append(x.text)
In [61]:
new_lst
Out[61]:
In [62]:
new_lst[0::3]
Out[62]:
In [63]:
nachnamen_liste = new_lst[1::3]
In [64]:
alters_liste = new_lst[2::3]
In [65]:
vornamen_liste = new_lst[0::3]
In [66]:
vornamen_liste
Out[66]:
In [80]:
vn_list = []
for vorname, alter, nachname in zip(vornamen_liste, alters_liste, nachnamen_liste):
mini_dict = {'Vorname':vorname,
'Nachname': nachname,
'Alter': alter}
vn_list.append(mini_dict)
In [81]:
df = pd.DataFrame(vn_list)
In [82]:
df
Out[82]:
In [ ]:
In [ ]:
In [ ]:
In [76]:
df['Alter'] = alters_liste
In [78]:
df['Nachnamen'] = nachnamen_liste
In [79]:
df
Out[79]:
In [ ]:
In [50]:
df['Alter'] = alters_lst
In [ ]:
In [45]:
df
Out[45]:
In [ ]:
In [ ]:
In [18]:
lst = file_soup.find_all('td')
In [20]:
for elem in lst:
print(elem.text)
In [22]:
lst[::3]
Out[22]:
In [23]:
lst[1::3]
Out[23]:
In [24]:
lst[2::3]
Out[24]:
In [27]:
final_lst = []
for n,nn,a in zip(lst[::3], lst[1::3], lst[2::3]):
mini_dict = {'Vorname':n.text,
'Nachname':nn.text,
'Alter':a.text}
final_lst.append(mini_dict)
In [28]:
pd.DataFrame(final_lst)
Out[28]:
In [ ]: